---
title: "survey-analysis.qmd"
author: "Francesco Garassino"
format: html
editor: visual
editor_options: 
  chunk_output_type: console
---

```{r setup}
#| echo: false
#| warning: false 
easypackages::packages('cowplot', 'RColorBrewer', 'tidyverse')
if (!grepl("Analysis_stakeholders_WP4.3", getwd())) {
  setwd(file.path("./Analysis_stakeholders_WP4.3/"))
}

# retrieve date and time
curr_date <- format(Sys.time(), "%d_%m_%y")
curr_time <- format(Sys.time(), "%H_%M")
```

```{r get-data}

data_path <- file.path("./inputs/3-AFFORD_survey_responses.csv")

survey_results <- read_delim(data_path, delim = ';') %>%
  janitor::remove_empty(which = "cols")

colnames(survey_results) <- c("response_id", "q1", "q2", "q3", "q4", "q5", "q6", "role",
                              "comments")
```

```{r responders-type}

survey_respondents <- survey_results %>% 
  mutate(role = case_when(
    !(role %in% c("Postdoctoral Researcher", "PhD Candidate", "Associate Professor", 
                  "Scientific staff", "Full Professor", "Assistant Professor", "Senior Assistant")) ~ "Other",
    TRUE ~ role  # Retain original role for matching cases
  )) %>% 
  group_by(role) %>% 
  summarise(n_respondents = n())

n_respondents_phd <- survey_respondents %>% 
  filter(role == "PhD Candidate") %>% 
  pull(n_respondents)

pct_respondents_phd <- n_respondents_phd/nrow(survey_results)

n_respondents_non_phd <- survey_respondents %>% 
  filter(role != "PhD Candidate") %>% 
  pull(n_respondents) %>% 
  sum()

pct_respondents_non_phd <- n_respondents_non_phd/nrow(survey_results)

print(c(n_respondents_phd, n_respondents_non_phd))
print(c(pct_respondents_phd, pct_respondents_non_phd))
```

```{r extract-comments}

survey_comments <- survey_results %>% 
  filter(!is.na(comments)) %>% 
  select(response_id, role, comments)

write_csv(survey_comments %>% select(comments),
          paste0("outputs/table_comments_", curr_date, "_", curr_time, ".csv"))

```

```{r get-key}
key_path <- file.path("./inputs/4-AFFORD_survey_questions_levels_key.csv")

key_tib <- read_delim(key_path, delim = ";") %>% 
  # make sure there's no empty rows
  drop_na()

```

```{r answers-tables}

tables_list <- list()
scores_list <- list()

q_columns <- survey_results %>% 
  select(matches("q\\d")) %>% 
  colnames()

for (name in q_columns) {
  q_results <- survey_results %>% 
    select(all_of(name)) %>% 
    rowwise() %>% 
    mutate(
      q_split = list( # need to wrap within list() for compatibility with rowwise()
        str_split(
          sub(";$", "", .data[[name]]), # gets rid of the last ";" which would result in a 5th, empty field
          pattern = ";") # splits the cell text into its 4 components (i.e. the ranked options)
      ),
      q_sub = lapply( # for each element of the list we added to each cell of q_split
        q_split, 
        function(x)sub(":.+", "", x)) # gets rid of all the text after the ":" (i.e. the explanation)
    ) %>% 
    pull(q_sub)
  
  # some replacements are needed as we changed some services' names while the survey was open
  if (name == "q1") {
    q_results <- lapply(q_results, 
                        gsub, 
                        pattern = 'Lab data hub|Lab research portal', 
                        replacement = 'Lab research data portal')
  } else if (name == "q2") {
    q_results <- lapply(q_results, 
                        gsub, 
                        pattern = '\\bInformation on UZH website\\b',
                        # regex '\\b' represents word/sentence boundaries, allowing exact matches
                        replacement = 'Information on UZH websites')
  } else if (name == "q3") {
    q_results <- lapply(q_results, 
                        gsub, 
                        pattern = 'Own data repository', 
                        replacement = 'UZH data repository')
    q_results <- lapply(q_results, 
                        gsub, 
                        pattern = 'Repositories navigation guide', 
                        replacement = 'Repositories decision tool')
    q_results <- lapply(q_results, 
                        gsub, 
                        pattern = 'Submission instructions', 
                        replacement = 'Submission tutorials')
  } else if (name == "q5") {
    q_results <- lapply(q_results, 
                        gsub, 
                        pattern = 'Mandatory ORD training', 
                        replacement = 'Mandatory training')
  } else if (name == "q6") {
    q_results <- lapply(q_results, 
                        gsub, 
                        pattern = 'Lab-wide consulting', 
                        replacement = 'Lab-wide support')
  }

  q_options <- q_results[[1]] # retrieve the options' names
  
  for (key_row in 1:nrow(key_tib)){ # iterate over key_tib rows
    log_bool <- c() # initialise vector to save booleans
    for (opt in q_options) { # iterate over q_options elements
      if(any(grepl(opt, key_tib[key_row, 2:5]))) # check if the current q_options element matches any value in the current key_tib row
      {
        log_bool <- append(log_bool, T) # if so, record a TRUE
      } else {
        log_bool <- append(log_bool, F) # if not, record a FALSE
      }
    }
    if (all(log_bool)) { # if all elements of q_options matched the current key_tib row (quality check, essentially)
      break # exit cycle. key_row will be the row index of key_tib corresponding to the current question
    }
  }
  
  question_name <- as.character(key_tib[key_row, 1]) # retrieve the name/category of the question from key_tib
  col_names <- as.character(key_tib[key_row, 2:5]) # retrieve the ordered support options from key_tib
  
  counts_tib <- tibble(!!!setNames(rep(list(0), length(col_names)), col_names)) %>% 
    # create the "results" tibble, using the names retrieved from key_tib as column names
    janitor::clean_names() %>% 
    slice(rep(1, 4)) # make sure we have four rows (= preference scores) in the tibble
  
  for (r in q_results) { # iterate over the survey results (i.e., per-respondent)
    for (n in 1:length(r)) { # iterate over the per-respondent responses
      string <- str_replace_all(tolower(r[n]), " ", "_") # formatting
      string <- str_replace_all(string, "-", "_") # formatting
      elem_index <- grep(string, colnames(counts_tib)) # where does the current response option fall along counts_tib?
      counts_tib[n, elem_index] = counts_tib[n, elem_index] + 1 # add 1 to the cell corresponding to the service and preference index
    }
  }
  
  tables_list[[question_name]] <- counts_tib # append counts_tib to the list of response tables
  
  scores_tib <- counts_tib %>% 
    mutate(rank = row_number()) %>% 
    rowwise() %>%
    mutate(across(everything(), ~ . * rank)) %>% 
    select(!rank)
  
  scores_list[[question_name]] <- scores_tib # append scores_tib to the list of scores tables
  
}

```

```{r vis_responses_bars}
max_counts <- map(tables_list, ~max(unlist(.)))
max_counts_all <- max(unlist(max_counts))

n_responses <- tables_list$info %>% 
  select(dedicated_website) %>%
  sum()

# define order of questions and reorder list of tables accordingly
q_order <- c("info", "ste", "IT", "shar", "comm", "train" )  # As in questionnaire
tables_list <- tables_list[q_order]
scores_list <- scores_list[q_order]

# initialize list to store plots
barplots_list <- list()

# initialize tibble to store ranks
ranks_tib <- tibble(area = character(0),
                    r1 = character(0), 
                    r2 = character(0), 
                    r3 = character(0), 
                    r4 = character(0))

for (i in 1:length(tables_list)) {
  question_name <- names(tables_list[i])
  print(question_name)

  labs <- key_tib  %>%  
    # Filter rows where question matches
    filter(question == question_name) %>% 
    select(2:5) 
  
  # retrieve the order of services, from the highest to the lowest scored, based on scores_list
  order_services <- scores_list[[i]] %>%
  t() %>%                     # Transpose the matrix
  as.data.frame() %>%        # Convert to data frame
  rownames_to_column(var = "row_name") %>%  # Convert row names to a column
  as_tibble() %>% 
  mutate(tot_score = rowSums(select(., starts_with("V")))) %>%
  select(row_name, tot_score) %>% 
  arrange(tot_score) %>% 
  pull(row_name)
  
  labs_ordered <- labs[match(order_services, tolower(gsub(" |-", "_", labs)))] %>% 
    # shorten specific services' names
    mutate(l1 = ifelse(l1 == "Data Management Plan support", "DMP support", l1)) %>% 
    as.character()
  
  ranks_tib <- ranks_tib %>% 
      bind_rows(tibble(area = question_name,
                       r1 = labs_ordered[1],
                       r2 = labs_ordered[2],
                       r3 = labs_ordered[3],
                       r4 = labs_ordered[4]))
  
  p_data <- tables_list[[i]] %>% 
    mutate(preference_level = as.numeric(rownames(.))) %>% 
    pivot_longer(!preference_level, names_to = "service", values_to = "n_scored")  %>% 
    mutate(service = factor(service, 
                            levels = rev(order_services),
                            labels = rev(labs_ordered)),
           preference_level = factor(preference_level,
                                     levels = c(1,2,3,4))) %>% 
    filter(n_scored != 0)
  
  bp <- p_data %>%  
    mutate(preference_level = as.factor(preference_level)) %>% 
    ggplot(aes(y = n_scored, x = service, fill = preference_level)) +    
    geom_bar(position="stack", stat="identity", colour = 'black') +
    scale_y_continuous(limits = c(0, n_responses), 
                       breaks = c(5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 105, 115),
                       expand = c(0, 0)) +
    scale_fill_brewer(palette = "YlGn", direction = -1) +
    labs(
      # title = question_name,
      y = "n scored",
      # x = ">>",
      fill = "Preference Level"  # Specify color legend title
    ) +
    geom_hline(yintercept = round(n_responses/2), 
               linetype = 2,
               linewidth = .5) +
    coord_flip() +
    theme_minimal(base_size = 12) +
    theme(
      plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), units = 'cm'),
      # axis.text.x = element_text(angle = 45, hjust = 1),
      panel.grid.major.x  = element_line(color = "grey40", linewidth = 0.5, linetype = 1),
      panel.grid.minor = element_blank(),
      aspect.ratio=.5,
      axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
      axis.title.x = element_text(size = 11),
      axis.title.y = element_blank()
      )
  
  barplots_list[[i]] <- bp
  

  }

comp_barplot <- ggpubr::ggarrange(plotlist = barplots_list,
                  legend = "bottom",
                  ncol = 2,
                  nrow = 3,
                  align = "v",
                  common.legend = TRUE,
                  labels = 'AUTO')

ggsave(file.path(paste0("outputs/barplot_all-responses_", curr_date, "_", curr_time, ".png")),
       width = 10,
       height = 7,
       units = 'in')

ggsave(file.path(paste0("outputs/barplot_all-responses_", curr_date, "_", curr_time, ".svg")),
       width = 10,
       height = 7,
       units = 'in')

# export table with ranking of services
ranks_tib %>% 
  write_csv(., paste0("outputs/table_all-responses_ordered_", curr_date, "_", curr_time, ".csv"))
```

```{r rank-visualisation}

ranks_tib_long <- ranks_tib %>% 
  pivot_longer(cols = starts_with('r'), names_to = 'rank', values_to = 'service')

key_tib_long <- key_tib %>% 
  mutate(l1 = ifelse(l1 == "Data Management Plan support", "DMP support", l1)) %>% 
  pivot_longer(cols = starts_with('l'), names_to = 'level', values_to = 'service')

ranks_tib_plot <- ranks_tib_long %>%
  left_join(key_tib_long, by = "service") %>%
  select(area, rank, service, level) %>% 
  mutate(rank = factor(rank, 
                       levels = rev(
                         unique(ranks_tib_long$rank)
                       ),
                       labels = rev(
                         gsub('r', '', unique(ranks_tib_long$rank)))
  ),
  area = factor(area, 
                levels = q_order,
                labels = c('Online information',
                           'Stewardship',
                           'IT support',
                           'Data sharing',
                           'Community',
                           'Training')
                ),
  level = gsub('l', '', level))

tile_plot <- ranks_tib_plot %>% 
  ggplot(aes(x = area, y = rank, fill = level)) +
  geom_tile(color = "black",
            lwd = .5,
            linetype = 1) +
  coord_fixed() +
  labs(fill = "Resources' availability level") +
  scale_fill_brewer(palette = "YlOrBr", direction = 1) +
  theme_minimal(base_size = 12) +
  theme(
    plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), units = 'cm'),
    axis.text.x = element_text(size = 10.5, angle = 45, hjust = .9),
    axis.title.x = element_blank(),
    legend.position = 'bottom')


ggsave(file.path(paste0("outputs/tile_plot_rankings_all-data_", curr_date, "_", curr_time, ".png")),
       width = 5,
       height = 3.33,
       units = 'in') 

```

```{r stratify}
# initialize tibble to store ranks
ranks_tib <- tibble(area = character(0), 
                    strat = character(0), 
                    r1 = character(0), 
                    r2 = character(0), 
                    r3 = character(0), 
                    r4 = character(0))

# stratify for type of respondent (PhD vs non-Phd)
strat_options = c("role == 'PhD Candidate'", "role != 'PhD Candidate'")

for (o in strat_options) {
  survey_results_strat <- survey_results %>% 
    filter(!!rlang::parse_expr(o))
  
  n_responses_strat <- nrow(survey_results_strat)
  pct_responses_strat <- n_responses_strat / n_responses * 100
  
  if (grepl("==", o)) {
    role <- "PhD Candidate"
  } else {
    role <- "non-PhD Candidate"
  }

  print(paste0(role, ": ", n_responses_strat, " responses, ", round(pct_responses_strat, 1), "%"))
  
  tables_list <- list()
  scores_list <- list()
  
  q_columns <- survey_results_strat %>% 
    select(matches("q\\d")) %>% 
    colnames()
  
  for (name in q_columns) {
    q_results <- survey_results_strat %>% 
      select(all_of(name)) %>% 
      rowwise() %>% 
      mutate(
        q_split = list( # need to wrap within list() for compatibility with rowwise()
          str_split(
            sub(";$", "", .data[[name]]), # gets rid of the last ";" which would result in a 5th, empty field
            pattern = ";") # splits the cell text into its 4 components (i.e. the ranked options)
        ),
        q_sub = lapply( # for each element of the list we added to each cell of q_split
          q_split, 
          function(x)sub(":.+", "", x)) # gets rid of all the text after the ":" (i.e. the explanation)
      ) %>% 
      pull(q_sub)
    
    # some replacements are needed as we changed some services' names while the survey was open
    if (name == "q1") {
      q_results <- lapply(q_results, 
                          gsub, 
                          pattern = 'Lab data hub|Lab research portal', 
                          replacement = 'Lab research data portal')
    } else if (name == "q2") {
      q_results <- lapply(q_results, 
                          gsub, 
                          pattern = '\\bInformation on UZH website\\b',
                          # regex '\\b' represents word/sentence boundaries, allowing exact matches
                          replacement = 'Information on UZH websites')
    } else if (name == "q3") {
      q_results <- lapply(q_results, 
                          gsub, 
                          pattern = 'Own data repository', 
                          replacement = 'UZH data repository')
      q_results <- lapply(q_results, 
                          gsub, 
                          pattern = 'Repositories navigation guide', 
                          replacement = 'Repositories decision tool')
      q_results <- lapply(q_results, 
                          gsub, 
                          pattern = 'Submission instructions', 
                          replacement = 'Submission tutorials')
    } else if (name == "q5") {
      q_results <- lapply(q_results, 
                          gsub, 
                          pattern = 'Mandatory ORD training', 
                          replacement = 'Mandatory training')
    } else if (name == "q6") {
      q_results <- lapply(q_results, 
                          gsub, 
                          pattern = 'Lab-wide consulting', 
                          replacement = 'Lab-wide support')
    }
    
    q_options <- q_results[[1]] # retrieve the options' names
    
    for (key_row in 1:nrow(key_tib)){ # iterate over key_tib rows
      log_bool <- c() # initialise vector to save booleans
      for (opt in q_options) { # iterate over q_options elements
        if(any(grepl(opt, key_tib[key_row, 2:5]))) # check if the current q_options element matches any value in the current key_tib row
        {
          log_bool <- append(log_bool, T) # if so, record a TRUE
        } else {
          log_bool <- append(log_bool, F) # if not, record a FALSE
        }
      }
      if (all(log_bool)) { # if all elements of q_options matched the current key_tib row (quality check, essentially)
        break # exit cycle. key_row will be the row index of key_tib corresponding to the current question
      }
    }
    
    question_name <- as.character(key_tib[key_row, 1]) # retrieve the name/category of the question from key_tib
    col_names <- as.character(key_tib[key_row, 2:5]) # retrieve the ordered support options from key_tib
    
    counts_tib <- tibble(!!!setNames(rep(list(0), length(col_names)), col_names)) %>% 
      # create the "results" tibble, using the names retrieved from key_tib as column names
      janitor::clean_names() %>% 
      slice(rep(1, 4)) # make sure we have four rows (= preference scores) in the tibble
    
    for (r in q_results) { # iterate over the survey results (i.e., per-respondent)
      for (n in 1:length(r)) { # iterate over the per-respondent responses
        string <- str_replace_all(tolower(r[n]), " ", "_") # formatting
        string <- str_replace_all(string, "-", "_") # formatting
        elem_index <- grep(string, colnames(counts_tib)) # where does the current response option fall along counts_tib?
        counts_tib[n, elem_index] = counts_tib[n, elem_index] + 1 # add 1 to the cell corresponding to the service and preference index
      }
    }
    
    tables_list[[question_name]] <- counts_tib # append counts_tib to the list of response tables
    
    scores_tib <- counts_tib %>% 
      mutate(rank = row_number()) %>% 
      rowwise() %>%
      mutate(across(everything(), ~ . * rank)) %>% 
      select(!rank)
    
    scores_list[[question_name]] <- scores_tib # append scores_tib to the list of scores tables
    
  } 
  
  max_counts <- map(tables_list, ~max(unlist(.)))
  max_counts_all <- max(unlist(max_counts))
  
  
  n_responses <- tables_list$info %>% 
    select(dedicated_website) %>%
    sum()
  
  # define order of questions and reorder list of tables accordingly
  q_order <- c("info", "ste", "IT", "shar", "comm", "train" )  # As in questionnaire
  tables_list <- tables_list[q_order]
  scores_list <- scores_list[q_order]
  
  # initialize list to store plots
  barplots_list <- list()
  # initialize list to store ordered plots
  barplots_ordered_list <- list()
  
  for (i in 1:length(tables_list)) {
    question_name <- names(tables_list[i])
    print(question_name)
    
    labs <- key_tib  %>%  
      # Filter rows where question matches
      filter(question == question_name) %>% 
      select(2:5) 
    
    # retrieve the order of services, from the highest to the lowest scored, based on scores_list
    order_services <- scores_list[[i]] %>%
      t() %>%                     # Transpose the matrix
      as.data.frame() %>%        # Convert to data frame
      rownames_to_column(var = "row_name") %>%  # Convert row names to a column
      as_tibble() %>%
      mutate(tot_score = rowSums(select(., starts_with("V")))) %>%
      select(row_name, tot_score) %>%
      arrange(tot_score) %>%
      pull(row_name)

    labs_ordered <- labs[match(order_services, tolower(gsub(" |-", "_", labs)))] %>%
      # shorten specific services' names
      mutate(l1 = ifelse(l1 == "Data Management Plan support", "DMP support", l1)) %>% 
      as.character()
    
    # store ranked services in tibble
    ranks_tib <- ranks_tib %>% 
      bind_rows(tibble(area = question_name, 
                       strat = role, 
                       r1 = labs_ordered[1],
                       r2 = labs_ordered[2],
                       r3 = labs_ordered[3],
                       r4 = labs_ordered[4]))
    
    labs <- labs %>% 
      # shorten specific services' names
      mutate(l1 = ifelse(l1 == "Data Management Plan support", "DMP support", l1))
    
    p_data <- tables_list[[i]] %>% 
      mutate(preference_level = as.numeric(rownames(.))) %>% 
      pivot_longer(!preference_level, names_to = "service", values_to = "n_scored")  %>% 
      mutate(service = factor(service,
                              levels = (unique(service)),
                              labels = (labs)),
      preference_level = factor(preference_level,
                                levels = c(1,2,3,4))) %>%
      filter(n_scored != 0)
    
    bp <- p_data %>%  
      mutate(preference_level = as.factor(preference_level)) %>% 
      ggplot(aes(y = n_scored, x = service, fill = preference_level)) +    
      geom_bar(position="stack", stat="identity", colour = 'black') +
      scale_y_continuous(limits = c(0, n_responses), 
                         breaks = c(5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 105, 115),
                         expand = c(0, 0)) +
      scale_fill_brewer(palette = "YlGn", direction = -1) +
      labs(
        # title = question_name,
        y = "n scored",
        # x = ">>",
        fill = "Preference Level"  # Specify color legend title
      ) +
      coord_flip() +
      theme_minimal(base_size = 12) +
      theme(
        plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), units = 'cm'),
        # axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid.major.x  = element_line(color = "grey40", linewidth = 0.5, linetype = 1),
        panel.grid.minor = element_blank(),
        aspect.ratio=.5,
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
        axis.title.x = element_text(size = 11),
        axis.title.y = element_blank()
      )
    
    p_data_ordered <- tables_list[[i]] %>% 
    mutate(preference_level = as.numeric(rownames(.))) %>% 
    pivot_longer(!preference_level, names_to = "service", values_to = "n_scored")  %>% 
    mutate(service = factor(service, 
                            levels = rev(order_services),
                            labels = rev(labs_ordered)),
           preference_level = factor(preference_level,
                                     levels = c(1,2,3,4))) %>% 
    filter(n_scored != 0)
    
    bp_ordered <- p_data_ordered %>%  
      mutate(preference_level = as.factor(preference_level)) %>% 
      ggplot(aes(y = n_scored, x = service, fill = preference_level)) +    
      geom_bar(position="stack", stat="identity", colour = 'black') +
      scale_y_continuous(limits = c(0, n_responses), 
                         breaks = c(5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 105, 115),
                         expand = c(0, 0)) +
      scale_fill_brewer(palette = "YlGn", direction = -1) +
      labs(
        # title = question_name,
        y = "n scored",
        # x = ">>",
        fill = "Preference Level"  # Specify color legend title
      ) +
      coord_flip() +
      theme_minimal(base_size = 12) +
      theme(
        plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), units = 'cm'),
        # axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid.major.x  = element_line(color = "grey40", linewidth = 0.5, linetype = 1),
        panel.grid.minor = element_blank(),
        aspect.ratio=.5,
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 10),
        axis.title.x = element_text(size = 11),
        axis.title.y = element_blank()
      )
    
    barplots_list[[i]] <- bp
    barplots_ordered_list[[i]] <- bp_ordered
    
  }
  
  comp_barplot <- ggpubr::ggarrange(plotlist = barplots_list,
                                    legend = "bottom",
                                    ncol = 2,
                                    nrow = 3,
                                    align = "v",
                                    common.legend = TRUE,
                                    labels = 'AUTO')
  
  curr_date <- format(Sys.time(), "%d_%m_%y")
  curr_time <- format(Sys.time(), "%H_%M")
  
  ggsave(file.path(paste0("outputs/barplot_responses_", role, "_", curr_date, "_", curr_time, ".png")),
         width = 10,
         height = 7,
         units = 'in') 
  
  comp_barplot_ordered <- ggpubr::ggarrange(plotlist = barplots_ordered_list,
                                    legend = "bottom",
                                    ncol = 2,
                                    nrow = 3,
                                    align = "v",
                                    common.legend = TRUE,
                                    labels = 'AUTO')
  
   ggsave(file.path(paste0("outputs/barplot_responses_ordered_", role, "_", curr_date, "_", curr_time, ".png")),
         width = 10,
         height = 7,
         units = 'in') 
   
   # export table with ranking of services
   ranks_tib %>% 
     write_csv(., paste0("outputs/table_responses_stratified_ordered_", curr_date, "_", curr_time, ".csv"))
}
```

```{r rank-visualisation-stratified}

ranks_tib_long <- ranks_tib %>% 
  pivot_longer(cols = starts_with('r'), names_to = 'rank', values_to = 'service')

key_tib_long <- key_tib %>% 
  mutate(l1 = ifelse(l1 == "Data Management Plan support", "DMP support", l1)) %>% 
  pivot_longer(cols = starts_with('l'), names_to = 'level', values_to = 'service')

ranks_tib_plot <- ranks_tib_long %>%
  left_join(key_tib_long, by = "service") %>%
  select(area, strat, rank, service, level) %>% 
  mutate(rank = factor(rank, 
                       levels = rev(
                         unique(ranks_tib_long$rank)
                       ),
                       labels = rev(
                         gsub('r', '', unique(ranks_tib_long$rank)))
  ),
  strat = factor(strat,
                 levels = c('PhD Candidate', 'non-PhD Candidate')
  ),
  area = factor(area, 
                levels = q_order,
                labels = c('Online information',
                           'Stewardship',
                           'IT support',
                           'Data sharing',
                           'Community',
                           'Training')
                ),
  level = gsub('l', '', level))

tile_plot_phd <- ranks_tib_plot %>% 
  filter(strat == 'PhD Candidate') %>% 
  ggplot(aes(x = area, y = rank, fill = level)) +
  geom_tile(color = "black",
            lwd = .5,
            linetype = 1) +
  coord_fixed() +
  labs(fill = "Resources' availability level") +
  scale_fill_brewer(palette = "YlOrBr", direction = 1) +
  theme_minimal(base_size = 12) +
  theme(
    plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), units = 'cm'),
    axis.text.x = element_text(size = 10.5, angle = 45, hjust = .9),
    axis.title.x = element_blank())

tile_plot_nonphd <- ranks_tib_plot %>% 
  filter(strat == 'non-PhD Candidate') %>% 
  ggplot(aes(x = area, y = rank, fill = level)) +
  geom_tile(color = "black",
            lwd = .5,
            linetype = 1) +
  coord_fixed() +
  labs(fill = "Resources' availability level") +
  scale_fill_brewer(palette = "YlOrBr", direction = 1) +
  theme_minimal(base_size = 12) +
  theme(
    plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), units = 'cm'),
    axis.text.x = element_text(size = 10.5, angle = 45, hjust = .9),
    axis.title.x = element_blank())


comp_tile_plot <- ggpubr::ggarrange(plotlist = list(tile_plot_phd, tile_plot_nonphd),
                                    legend = "bottom",
                                    ncol = 2,
                                    nrow = 1,
                                    align = "v",
                                    common.legend = TRUE,
                                    labels = 'AUTO')

ggsave(file.path(paste0("outputs/tile_plots_rankings_", curr_date, "_", curr_time, ".png")),
       width = 7,
       height = 3.33,
       units = 'in') 
```

```{r vis-old}
# plots_list <- list()
# breaks <- seq(0, max_counts_all, by = 1)  # Define breaks as integers from 1 to the highest count number
# labels <- as.character(breaks)  # Convert breaks to character for labels
# col_palette <- colorRampPalette(brewer.pal(9, "Oranges"))
# p <- p_data %>%   
  #   ggplot(aes(y = preference_level, x = service, size = n_scored, colour = n_scored)) +
  #   geom_point(size = 4) +
  #   scale_y_reverse() +
  #   scale_x_discrete(labels = c('1','2','3','4')) +
  #   # scale_size_continuous(breaks = breaks, labels = labels, guide = "none") + # Adjust the range of point sizes
  #   scale_colour_gradientn(colours = col_palette(length(breaks)), limits = range(breaks), breaks = breaks, labels = labels) +
  #   labs(
  #     title = question_name,
  #     # subtitle = "Dot size representing how many times the service received a preference level",
  #     y = "Preference Level",
  #     x = "Resources' Availability Level",
  #     size = "Number of responses",
  #     color = "Number of responses"  # Specify color legend title
  #   ) +
  #   theme_minimal() +
  #   theme(
  #     # axis.text.x = element_text(angle = 45, hjust = 1),
  #     panel.grid.major = element_line(color = "grey40", linewidth = 0.5, linetype = 1),
  #     panel.grid.minor = element_blank(),
  #     aspect.ratio=.6)
  # 
  # plots_list[[i]] <- p

# comp_plot <- ggpubr::ggarrange(plotlist = plots_list,
#                   legend = "bottom",
#                   common.legend = TRUE)
```
